# Licensed under a 3-clause BSD style license - see LICENSE.rst

"""
This module tests some of the methods related to the ``HTML``
reader/writer and aims to document its functionality.

Requires `BeautifulSoup <http://www.crummy.com/software/BeautifulSoup/>`_
to be installed.
"""

import os
from io import StringIO
from pathlib import Path

import numpy as np
import pytest

from astropy.io import ascii
from astropy.io.ascii import core, html
from astropy.table import Table
from astropy.utils.compat.optional_deps import HAS_BLEACH, HAS_BS4

from .common import setup_function, teardown_function  # noqa: F401

if HAS_BS4:
    from bs4 import BeautifulSoup, FeatureNotFound


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_soupstring():
    """
    Test to make sure the class SoupString behaves properly.
    """

    soup = BeautifulSoup(
        "<html><head></head><body><p>foo</p></body></html>", "html.parser"
    )
    soup_str = html.SoupString(soup)
    assert isinstance(soup_str, str)
    assert isinstance(soup_str, html.SoupString)
    assert soup_str == "<html><head></head><body><p>foo</p></body></html>"
    assert soup_str.soup is soup


def test_listwriter():
    """
    Test to make sure the class ListWriter behaves properly.
    """

    lst = []
    writer = html.ListWriter(lst)

    for i in range(5):
        writer.write(i)
    for ch in "abcde":
        writer.write(ch)

    assert lst == [0, 1, 2, 3, 4, "a", "b", "c", "d", "e"]


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_identify_table():
    """
    Test to make sure that identify_table() returns whether the
    given BeautifulSoup tag is the correct table to process.
    """

    # Should return False on non-<table> tags and None
    soup = BeautifulSoup("<html><body></body></html>", "html.parser")
    assert html.identify_table(soup, {}, 0) is False
    assert html.identify_table(None, {}, 0) is False

    soup = BeautifulSoup(
        '<table id="foo"><tr><th>A</th></tr><tr><td>B</td></tr></table>',
        "html.parser",
    ).table
    assert html.identify_table(soup, {}, 2) is False
    assert html.identify_table(soup, {}, 1) is True  # Default index of 1

    # Same tests, but with explicit parameter
    assert html.identify_table(soup, {"table_id": 2}, 1) is False
    assert html.identify_table(soup, {"table_id": 1}, 1) is True

    # Test identification by string ID
    assert html.identify_table(soup, {"table_id": "bar"}, 1) is False
    assert html.identify_table(soup, {"table_id": "foo"}, 1) is True


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_missing_data():
    """
    Test reading a table with missing data
    """
    # First with default where blank => '0'
    table_in = [
        "<table>",
        "<tr><th>A</th></tr>",
        "<tr><td></td></tr>",
        "<tr><td>1</td></tr>",
        "</table>",
    ]
    dat = Table.read(table_in, format="ascii.html")
    assert dat.masked is False
    assert np.all(dat["A"].mask == [True, False])
    assert dat["A"].dtype.kind == "i"

    # Now with a specific value '...' => missing
    table_in = [
        "<table>",
        "<tr><th>A</th></tr>",
        "<tr><td>...</td></tr>",
        "<tr><td>1</td></tr>",
        "</table>",
    ]
    dat = Table.read(table_in, format="ascii.html", fill_values=[("...", "0")])
    assert dat.masked is False
    assert np.all(dat["A"].mask == [True, False])
    assert dat["A"].dtype.kind == "i"


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_rename_cols():
    """
    Test reading a table and renaming cols
    """
    table_in = [
        "<table>",
        "<tr><th>A</th> <th>B</th></tr>",
        "<tr><td>1</td><td>2</td></tr>",
        "</table>",
    ]

    # Swap column names
    dat = Table.read(table_in, format="ascii.html", names=["B", "A"])
    assert dat.colnames == ["B", "A"]
    assert len(dat) == 1

    # Swap column names and only include A (the renamed version)
    dat = Table.read(
        table_in, format="ascii.html", names=["B", "A"], include_names=["A"]
    )
    assert dat.colnames == ["A"]
    assert len(dat) == 1
    assert np.all(dat["A"] == 2)


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_no_names():
    """
    Test reading a table with no column header
    """
    table_in = ["<table>", "<tr><td>1</td></tr>", "<tr><td>2</td></tr>", "</table>"]
    dat = Table.read(table_in, format="ascii.html")
    assert dat.colnames == ["col1"]
    assert len(dat) == 2

    dat = Table.read(table_in, format="ascii.html", names=["a"])
    assert dat.colnames == ["a"]
    assert len(dat) == 2


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_identify_table_fail():
    """
    Raise an exception with an informative error message if table_id
    is not found.
    """
    table_in = ['<table id="foo"><tr><th>A</th></tr>', "<tr><td>B</td></tr></table>"]

    with pytest.raises(core.InconsistentTableError) as err:
        Table.read(
            table_in, format="ascii.html", htmldict={"table_id": "bad_id"}, guess=False
        )
    assert err.match("ERROR: HTML table id 'bad_id' not found$")

    with pytest.raises(core.InconsistentTableError) as err:
        Table.read(table_in, format="ascii.html", htmldict={"table_id": 3}, guess=False)
    assert err.match("ERROR: HTML table number 3 not found$")


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_backend_parsers():
    """
    Make sure the user can specify which back-end parser to use
    and that an error is raised if the parser is invalid.
    """
    for parser in ("lxml", "xml", "html.parser", "html5lib"):
        try:
            Table.read(
                "data/html2.html",
                format="ascii.html",
                htmldict={"parser": parser},
                guess=False,
            )
        except FeatureNotFound:
            if parser == "html.parser":
                raise
            # otherwise ignore if the dependency isn't present

    # reading should fail if the parser is invalid
    with pytest.raises(FeatureNotFound):
        Table.read(
            "data/html2.html",
            format="ascii.html",
            htmldict={"parser": "foo"},
            guess=False,
        )


@pytest.mark.skipif(HAS_BS4, reason="requires no BeautifulSoup4")
def test_htmlinputter_no_bs4():
    """
    This should return an OptionalTableImportError if BeautifulSoup
    is not installed.
    """

    inputter = html.HTMLInputter()
    with pytest.raises(core.OptionalTableImportError):
        inputter.process_lines([])


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_htmlinputter():
    """
    Test to ensure that HTMLInputter correctly converts input
    into a list of SoupStrings representing table elements.
    """

    f = "data/html.html"
    with open(f) as fd:
        table = fd.read()

    inputter = html.HTMLInputter()
    inputter.html = {}

    # In absence of table_id, defaults to the first table
    expected = [
        "<tr><th>Column 1</th><th>Column 2</th><th>Column 3</th></tr>",
        "<tr><td>1</td><td>a</td><td>1.05</td></tr>",
        "<tr><td>2</td><td>b</td><td>2.75</td></tr>",
        "<tr><td>3</td><td>c</td><td>-1.25</td></tr>",
    ]
    assert [str(x) for x in inputter.get_lines(table)] == expected

    # Should raise an InconsistentTableError if the table is not found
    inputter.html = {"table_id": 4}
    with pytest.raises(core.InconsistentTableError):
        inputter.get_lines(table)

    # Identification by string ID
    inputter.html["table_id"] = "second"
    expected = [
        "<tr><th>Column A</th><th>Column B</th><th>Column C</th></tr>",
        "<tr><td>4</td><td>d</td><td>10.5</td></tr>",
        "<tr><td>5</td><td>e</td><td>27.5</td></tr>",
        "<tr><td>6</td><td>f</td><td>-12.5</td></tr>",
    ]
    assert [str(x) for x in inputter.get_lines(table)] == expected

    # Identification by integer index
    inputter.html["table_id"] = 3
    expected = [
        "<tr><th>C1</th><th>C2</th><th>C3</th></tr>",
        "<tr><td>7</td><td>g</td><td>105.0</td></tr>",
        "<tr><td>8</td><td>h</td><td>275.0</td></tr>",
        "<tr><td>9</td><td>i</td><td>-125.0</td></tr>",
    ]
    assert [str(x) for x in inputter.get_lines(table)] == expected


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_htmlsplitter():
    """
    Test to make sure that HTMLSplitter correctly inputs lines
    of type SoupString to return a generator that gives all
    header and data elements.
    """

    splitter = html.HTMLSplitter()

    lines = [
        html.SoupString(
            BeautifulSoup(
                "<table><tr><th>Col 1</th><th>Col 2</th></tr></table>", "html.parser"
            ).tr
        ),
        html.SoupString(
            BeautifulSoup(
                "<table><tr><td>Data 1</td><td>Data 2</td></tr></table>", "html.parser"
            ).tr
        ),
    ]
    expected_data = [["Col 1", "Col 2"], ["Data 1", "Data 2"]]
    assert list(splitter(lines)) == expected_data

    # Make sure the presence of a non-SoupString triggers a TypeError
    lines.append("<tr><td>Data 3</td><td>Data 4</td></tr>")
    with pytest.raises(TypeError):
        list(splitter(lines))

    # Make sure that passing an empty list triggers an error
    with pytest.raises(core.InconsistentTableError):
        list(splitter([]))


@pytest.mark.parametrize(
    "get_table",
    [
        lambda path: os.fspath(path),
        lambda path: Path(path),
        lambda path: Path(path).read_text(),
    ],
)
@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_htmlheader_start(get_table):
    """
    Test to ensure that the start_line method of HTMLHeader
    returns the first line of header data. Uses t/html.html
    for sample input.
    """

    table_file = "data/html.html"
    table = get_table(table_file)

    inputter = html.HTMLInputter()
    inputter.html = {}
    header = html.HTMLHeader()

    lines = inputter.get_lines(table)
    assert (
        str(lines[header.start_line(lines)])
        == "<tr><th>Column 1</th><th>Column 2</th><th>Column 3</th></tr>"
    )
    inputter.html["table_id"] = "second"
    lines = inputter.get_lines(table)
    assert (
        str(lines[header.start_line(lines)])
        == "<tr><th>Column A</th><th>Column B</th><th>Column C</th></tr>"
    )
    inputter.html["table_id"] = 3
    lines = inputter.get_lines(table)
    assert (
        str(lines[header.start_line(lines)])
        == "<tr><th>C1</th><th>C2</th><th>C3</th></tr>"
    )

    # start_line should return None if no valid header is found
    lines = [
        html.SoupString(
            BeautifulSoup("<table><tr><td>Data</td></tr></table>", "html.parser").tr
        ),
        html.SoupString(BeautifulSoup("<p>Text</p>", "html.parser").p),
    ]
    assert header.start_line(lines) is None

    # Should raise an error if a non-SoupString is present
    lines.append("<tr><th>Header</th></tr>")
    with pytest.raises(TypeError):
        header.start_line(lines)


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_htmldata():
    """
    Test to ensure that the start_line and end_lines methods
    of HTMLData returns the first line of table data. Uses
    t/html.html for sample input.
    """

    f = "data/html.html"
    with open(f) as fd:
        table = fd.read()

    inputter = html.HTMLInputter()
    inputter.html = {}
    data = html.HTMLData()

    lines = inputter.get_lines(table)
    assert (
        str(lines[data.start_line(lines)])
        == "<tr><td>1</td><td>a</td><td>1.05</td></tr>"
    )
    # end_line returns the index of the last data element + 1
    assert (
        str(lines[data.end_line(lines) - 1])
        == "<tr><td>3</td><td>c</td><td>-1.25</td></tr>"
    )

    inputter.html["table_id"] = "second"
    lines = inputter.get_lines(table)
    assert (
        str(lines[data.start_line(lines)])
        == "<tr><td>4</td><td>d</td><td>10.5</td></tr>"
    )
    assert (
        str(lines[data.end_line(lines) - 1])
        == "<tr><td>6</td><td>f</td><td>-12.5</td></tr>"
    )

    inputter.html["table_id"] = 3
    lines = inputter.get_lines(table)
    assert (
        str(lines[data.start_line(lines)])
        == "<tr><td>7</td><td>g</td><td>105.0</td></tr>"
    )
    assert (
        str(lines[data.end_line(lines) - 1])
        == "<tr><td>9</td><td>i</td><td>-125.0</td></tr>"
    )

    # start_line should raise an error if no table data exists
    lines = [
        html.SoupString(BeautifulSoup("<div></div>", "html.parser").div),
        html.SoupString(BeautifulSoup("<p>Text</p>", "html.parser").p),
    ]
    with pytest.raises(core.InconsistentTableError):
        data.start_line(lines)

    # end_line should return None if no table data exists
    assert data.end_line(lines) is None

    # Should raise an error if a non-SoupString is present
    lines.append("<tr><td>Data</td></tr>")
    with pytest.raises(TypeError):
        data.start_line(lines)
    with pytest.raises(TypeError):
        data.end_line(lines)


def test_multicolumn_write():
    """
    Test to make sure that the HTML writer writes multidimensional
    columns (those with iterable elements) using the colspan
    attribute of <th>.
    """

    col1 = [1, 2, 3]
    col2 = [(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)]
    col3 = [("a", "a", "a"), ("b", "b", "b"), ("c", "c", "c")]
    table = Table([col1, col2, col3], names=("C1", "C2", "C3"))
    expected = """\
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="text/html;charset=UTF-8" http-equiv="Content-type"/>
 </head>
 <body>
  <table>
   <thead>
    <tr>
     <th>C1</th>
     <th colspan="2">C2</th>
     <th colspan="3">C3</th>
    </tr>
   </thead>
   <tr>
    <td>1</td>
    <td>1.0</td>
    <td>1.0</td>
    <td>a</td>
    <td>a</td>
    <td>a</td>
   </tr>
   <tr>
    <td>2</td>
    <td>2.0</td>
    <td>2.0</td>
    <td>b</td>
    <td>b</td>
    <td>b</td>
   </tr>
   <tr>
    <td>3</td>
    <td>3.0</td>
    <td>3.0</td>
    <td>c</td>
    <td>c</td>
    <td>c</td>
   </tr>
  </table>
 </body>
</html>
    """
    out = html.HTML().write(table)[0].strip()
    assert out == expected.strip()


@pytest.mark.skipif(not HAS_BLEACH, reason="requires bleach")
def test_multicolumn_write_escape():
    """
    Test to make sure that the HTML writer writes multidimensional
    columns (those with iterable elements) using the colspan
    attribute of <th>.
    """

    col1 = [1, 2, 3]
    col2 = [(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)]
    col3 = [("<a></a>", "<a></a>", "a"), ("<b></b>", "b", "b"), ("c", "c", "c")]
    table = Table([col1, col2, col3], names=("C1", "C2", "C3"))
    expected = """\
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="text/html;charset=UTF-8" http-equiv="Content-type"/>
 </head>
 <body>
  <table>
   <thead>
    <tr>
     <th>C1</th>
     <th colspan="2">C2</th>
     <th colspan="3">C3</th>
    </tr>
   </thead>
   <tr>
    <td>1</td>
    <td>1.0</td>
    <td>1.0</td>
    <td><a></a></td>
    <td><a></a></td>
    <td>a</td>
   </tr>
   <tr>
    <td>2</td>
    <td>2.0</td>
    <td>2.0</td>
    <td><b></b></td>
    <td>b</td>
    <td>b</td>
   </tr>
   <tr>
    <td>3</td>
    <td>3.0</td>
    <td>3.0</td>
    <td>c</td>
    <td>c</td>
    <td>c</td>
   </tr>
  </table>
 </body>
</html>
    """
    out = html.HTML(htmldict={"raw_html_cols": "C3"}).write(table)[0].strip()
    assert out == expected.strip()


def test_write_no_multicols():
    """
    Test to make sure that the HTML writer will not use
    multi-dimensional columns if the multicol parameter
    is False.
    """

    col1 = [1, 2, 3]
    col2 = [(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)]
    col3 = [("a", "a", "a"), ("b", "b", "b"), ("c", "c", "c")]
    table = Table([col1, col2, col3], names=("C1", "C2", "C3"))
    expected = """\
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="text/html;charset=UTF-8" http-equiv="Content-type"/>
 </head>
 <body>
  <table>
   <thead>
    <tr>
     <th>C1</th>
     <th>C2</th>
     <th>C3</th>
    </tr>
   </thead>
   <tr>
    <td>1</td>
    <td>1.0 .. 1.0</td>
    <td>a .. a</td>
   </tr>
   <tr>
    <td>2</td>
    <td>2.0 .. 2.0</td>
    <td>b .. b</td>
   </tr>
   <tr>
    <td>3</td>
    <td>3.0 .. 3.0</td>
    <td>c .. c</td>
   </tr>
  </table>
 </body>
</html>
    """
    assert html.HTML({"multicol": False}).write(table)[0].strip() == expected.strip()


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_multicolumn_read():
    """
    Test to make sure that the HTML reader inputs multidimensional
    columns (those with iterable elements) using the colspan
    attribute of <th>.

    Ensure that any string element within a multidimensional column
    casts all elements to string prior to type conversion operations.
    """

    table = Table.read("data/html2.html", format="ascii.html")
    str_type = np.dtype((str, 21))
    expected = Table(
        np.array(
            [(["1", "2.5000000000000000001"], 3), (["1a", "1"], 3.5)],
            dtype=[("A", str_type, (2,)), ("B", "<f8")],
        )
    )
    assert np.all(table == expected)


@pytest.mark.skipif(not HAS_BLEACH, reason="requires bleach")
def test_raw_html_write():
    """
    Test that columns can contain raw HTML which is not escaped.
    """
    t = Table([["<em>x</em>"], ["<em>y</em>"]], names=["a", "b"])

    # One column contains raw HTML (string input)
    out = StringIO()
    t.write(out, format="ascii.html", htmldict={"raw_html_cols": "a"})
    expected = """\
   <tr>
    <td><em>x</em></td>
    <td>&lt;em&gt;y&lt;/em&gt;</td>
   </tr>"""
    assert expected in out.getvalue()

    # One column contains raw HTML (list input)
    out = StringIO()
    t.write(out, format="ascii.html", htmldict={"raw_html_cols": ["a"]})
    assert expected in out.getvalue()

    # Two columns contains raw HTML (list input)
    out = StringIO()
    t.write(out, format="ascii.html", htmldict={"raw_html_cols": ["a", "b"]})
    expected = """\
   <tr>
    <td><em>x</em></td>
    <td><em>y</em></td>
   </tr>"""
    assert expected in out.getvalue()


@pytest.mark.skipif(not HAS_BLEACH, reason="requires bleach")
def test_raw_html_write_clean():
    """
    Test that columns can contain raw HTML which is not escaped.
    """
    import bleach

    t = Table(
        [["<script>x</script>"], ["<p>y</p>"], ["<em>y</em>"]], names=["a", "b", "c"]
    )

    # Confirm that <script> and <p> get escaped but not <em>
    out = StringIO()
    t.write(out, format="ascii.html", htmldict={"raw_html_cols": t.colnames})
    expected = """\
   <tr>
    <td>&lt;script&gt;x&lt;/script&gt;</td>
    <td>&lt;p&gt;y&lt;/p&gt;</td>
    <td><em>y</em></td>
   </tr>"""
    assert expected in out.getvalue()

    # Confirm that we can whitelist <p>
    out = StringIO()
    t.write(
        out,
        format="ascii.html",
        htmldict={
            "raw_html_cols": t.colnames,
            "raw_html_clean_kwargs": {"tags": list(bleach.ALLOWED_TAGS) + ["p"]},
        },
    )
    expected = """\
   <tr>
    <td>&lt;script&gt;x&lt;/script&gt;</td>
    <td><p>y</p></td>
    <td><em>y</em></td>
   </tr>"""
    assert expected in out.getvalue()


def test_write_table_html_fill_values():
    """
    Test that passing fill_values should replace any matching row
    """
    buffer_output = StringIO()
    t = Table([[1], [2]], names=("a", "b"))
    ascii.write(t, buffer_output, fill_values=("1", "Hello world"), format="html")

    t_expected = Table([["Hello world"], [2]], names=("a", "b"))
    buffer_expected = StringIO()
    ascii.write(t_expected, buffer_expected, format="html")

    assert buffer_output.getvalue() == buffer_expected.getvalue()


def test_write_table_html_fill_values_optional_columns():
    """
    Test that passing optional column in fill_values should only replace
    matching columns
    """
    buffer_output = StringIO()
    t = Table([[1], [1]], names=("a", "b"))
    ascii.write(t, buffer_output, fill_values=("1", "Hello world", "b"), format="html")

    t_expected = Table([[1], ["Hello world"]], names=("a", "b"))
    buffer_expected = StringIO()
    ascii.write(t_expected, buffer_expected, format="html")

    assert buffer_output.getvalue() == buffer_expected.getvalue()


def test_write_table_html_fill_values_masked():
    """
    Test that passing masked values in fill_values should only replace
    masked columns or values
    """
    buffer_output = StringIO()
    t = Table([[1], [1]], names=("a", "b"), masked=True, dtype=("i4", "i8"))
    t["a"] = np.ma.masked
    ascii.write(t, buffer_output, fill_values=(ascii.masked, "TEST"), format="html")

    t_expected = Table([["TEST"], [1]], names=("a", "b"))
    buffer_expected = StringIO()
    ascii.write(t_expected, buffer_expected, format="html")

    assert buffer_output.getvalue() == buffer_expected.getvalue()


def test_multicolumn_table_html_fill_values():
    """
    Test to make sure that the HTML writer writes multidimensional
    columns with correctly replaced fill_values.
    """
    col1 = [1, 2, 3]
    col2 = [(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)]
    col3 = [("a", "a", "a"), ("b", "b", "b"), ("c", "c", "c")]

    buffer_output = StringIO()
    t = Table([col1, col2, col3], names=("C1", "C2", "C3"))
    ascii.write(t, buffer_output, fill_values=("a", "z"), format="html")

    col1 = [1, 2, 3]
    col2 = [(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)]
    col3 = [("z", "z", "z"), ("b", "b", "b"), ("c", "c", "c")]

    buffer_expected = StringIO()
    t_expected = Table([col1, col2, col3], names=("C1", "C2", "C3"))
    ascii.write(t_expected, buffer_expected, format="html")

    assert buffer_output.getvalue() == buffer_expected.getvalue()


def test_multi_column_write_table_html_fill_values_masked():
    """
    Test that passing masked values in fill_values should only replace
    masked columns or values for multidimensional tables
    """
    buffer_output = StringIO()
    t = Table([[1, 2, 3, 4], ["--", "a", "--", "b"]], names=("a", "b"), masked=True)
    t["a"][0:2] = np.ma.masked
    t["b"][0:2] = np.ma.masked
    ascii.write(t, buffer_output, fill_values=[(ascii.masked, "MASKED")], format="html")

    t_expected = Table(
        [["MASKED", "MASKED", 3, 4], ["MASKED", "MASKED", "--", "b"]], names=("a", "b")
    )
    buffer_expected = StringIO()
    ascii.write(t_expected, buffer_expected, format="html")
    print(buffer_expected.getvalue())

    assert buffer_output.getvalue() == buffer_expected.getvalue()


def test_write_table_formatted_columns():
    """
    Test to make sure that the HTML writer writes out using the
    supplied formatting.
    """

    col1 = [1, 2]
    col2 = [1.234567e-11, -9.876543e11]
    formats = {"C1": "04d", "C2": ".2e"}
    table = Table([col1, col2], names=formats.keys())

    expected = """\
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="text/html;charset=UTF-8" http-equiv="Content-type"/>
 </head>
 <body>
  <table>
   <thead>
    <tr>
     <th>C1</th>
     <th>C2</th>
    </tr>
   </thead>
   <tr>
    <td>0001</td>
    <td>1.23e-11</td>
   </tr>
   <tr>
    <td>0002</td>
    <td>-9.88e+11</td>
   </tr>
  </table>
 </body>
</html>
    """
    with StringIO() as sp:
        table.write(sp, format="html", formats=formats)
        out = sp.getvalue().strip()
    assert out == expected.strip()


@pytest.mark.skipif(not HAS_BS4, reason="requires BeautifulSoup4")
def test_read_html_unicode():
    """
    Test reading an HTML table with unicode values
    """
    table_in = [
        "<table>",
        "<tr><td>&#x0394;</td></tr>",
        "<tr><td>Δ</td></tr>",
        "</table>",
    ]
    dat = Table.read(table_in, format="ascii.html")
    assert np.all(dat["col1"] == ["Δ", "Δ"])
